cap log close 
log using "${logpath}clean_ridelevel.log", replace

/*******************************************************************************
clean_ridelevel.do

This code takes the ride-level claims panel and cleans it.
*******************************************************************************/

clear all

qui {
	use ambulance_id FROM TO hcpcs_code NPI month year usrds_id COUNTY STATE tot_pmt ///
		using "${rawdatapath}ridelevel_data_complete.dta" if year>=2003, clear

	bysort ambulance_id: gen duplicate = _N
	drop if duplicate > 1
	drop duplicate

	rename FROM from
	rename TO to

	gen orig_cat = "Diagnostic Site" if from =="D"
	replace orig_cat = "Residential Facility" if from =="E"
	replace orig_cat = "Hospital-Based ESRD Facility" if from =="G"
	replace orig_cat = "Hospital" if from =="H"
	replace orig_cat = "Transfer Site" if from =="I"
	replace orig_cat = "Freestanding ESRD Facility" if from =="J"
	replace orig_cat = "Skilled Nursing Facility" if from =="N"
	replace orig_cat = "Physician's Office'" if from =="P"
	replace orig_cat = "Residence" if from =="R"
	replace orig_cat = "Scene of Acute Event" if from =="S"
	replace orig_cat = "Intermediate Stop at Physicians Office" if from =="X"
	replace orig_cat = "Other" if missing(orig_cat) & !missing(from)

	gen orig_cat2 = orig_cat
	replace orig_cat2 = "Home" if orig_cat == "Residential Facility" | orig_cat == "Skilled Nursing Facility" | orig_cat == "Residence"
	replace orig_cat2 = "ESRD" if orig_cat == "Hospital-Based ESRD Facility" | orig_cat == "Freestanding ESRD Facility"
	replace orig_cat2 = "Physician's Office" if orig_cat == "Intermediate Stop at Physician's Office"
	replace orig_cat2 = "Other" if orig_cat2 != "Home" & orig_cat2 != "ESRD" & orig_cat2 != "Physician's Office" & orig_cat2 != "Hospital" & !missing(orig_cat2)

	gen dest_cat = "Diagnostic Site" if to =="D"
	replace dest_cat = "Residential Facility" if to =="E"
	replace dest_cat = "Hospital-Based ESRD Facility" if to =="G"
	replace dest_cat = "Hospital" if to =="H"
	replace dest_cat = "Transfer Site" if to =="I"
	replace dest_cat = "Freestanding ESRD Facility" if to =="J"
	replace dest_cat = "Skilled Nursing Facility" if to =="N"
	replace dest_cat = "Physician's Office'" if to =="P"
	replace dest_cat = "Residence" if to =="R"
	replace dest_cat = "Scene of Acute Event" if to =="S"
	replace dest_cat = "Intermediate Stop at Physicians Office" if to =="X"
	replace dest_cat = "Other" if missing(dest_cat) & !missing(to)

	gen dest_cat2 = dest_cat
	replace dest_cat2 = "Home" if dest_cat == "Residential Facility" | dest_cat == "Skilled Nursing Facility" | dest_cat == "Residence"
	replace dest_cat2 = "ESRD" if dest_cat == "Hospital-Based ESRD Facility" | dest_cat == "Freestanding ESRD Facility"
	replace dest_cat2 = "Physician's Office" if dest_cat == "Intermediate Stop at Physician's Office"
	replace dest_cat2 = "Other" if dest_cat2 != "Home" & dest_cat2 != "ESRD" & dest_cat2 != "Physician's Office" & dest_cat2 != "Hospital" & !missing(dest_cat2)

	gen als = 1 if hcpcs_code=="A0426" | hcpcs_code=="A0427"
	replace als = 0 if hcpcs_code=="A0428" | hcpcs_code=="A0429"
	replace als = 2 if hcpcs_code=="A0433"
	replace als = 3 if hcpcs_code=="A0430" | hcpcs_code=="A0431"
	replace als = 4 if hcpcs_code=="A0432" | hcpcs_code=="A0434"

	label define als_type 0 "Basic Life Support" 1 "Advanced Life Support, Level 1" 2 "Advanced Life Support, Level 2" 3 "Air Transport" 4 "Other Ambulance Service"
	label values als als_type

	gen emergency = 0 if hcpcs_code=="A0426" | hcpcs_code=="A0428"
	replace emergency = 1 if hcpcs_code=="A0427" | hcpcs_code=="A0429"

	gen date=mdy(month,1,year)
	format date %d
	gen t=ym(year,month)
	format date %tm

	label define dates 15341 "2002" 15706 "2003" 16071 "2004" 16802 "2006" 17532 "2008" 17898 "2009" 18263 "2010" 18993 "2012" 19724 "2014" 20089 "2015" 20454 "2016" 21185 "2018"
	label values date dates

	order usrds_id t date

	/*Defining prior authorization status*/
	rename (COUNTY STATE) (county state)
	destring county, replace
	destring state, replace
	gen priorauth_date=0 if !missing(state)
	replace priorauth_date=1 if state==34 | state==42 | state==45
	replace priorauth_date=2 if state==10 | state==11 | state==24 | state==37 | state==51 | state==54
	gen prior_auth = 0 if !missing(state)
	replace prior_auth = 1 if priorauth_date==1 & date>=20058
	replace prior_auth = 1 if priorauth_date==2 & date>=20454

	gen NE_hometodial=(als==0 & emergency==0 & ((orig_cat2=="Home" & dest_cat2=="ESRD") | (dest_cat2=="Home" & orig_cat2=="ESRD")))

	gen emerg_pmt=tot_pmt if emergency==1

	/*Merging to get districts*/
	merge m:1 county state using "${cleandatapath}DOJcounty_Xwalk.dta"
	drop if _merge==2
	drop _merge

	merge m:1 state using "${cleandatapath}DOJstate_Xwalk.dta"
	drop if _merge==2
	replace district=statedistrict if missing(district)
	drop _merge statedistrict

	replace district="MISSING" if missing(district)
	encode district, gen(dist_code)

	/*Merging with DOJ data*/
	sort NPI
	rename NPI npi
	merge m:1 npi using "${cleandatapath}Indicted_NPIs.dta"
	rename npi NPI
	drop if _merge==2
	gen fraud=(_merge==3)
	drop _merge

	merge m:1 district using "${cleandatapath}DOJ_data_district.dta"
	drop _merge

	preserve
	drop if year<2011
	keep NE_hometodial emergency tot_pmt *cat2 dist_code district t prior_auth NPI state date month year
	save "${cleandatapath}ridelevel_emerg.dta", replace
	restore
	
	preserve
	keep if emergency==1
	collapse (count) emerg_rides=ambulance_id (sum) emerg_pmt=tot_pmt, by(district dist_code date month year)
	tempfile emerg
	save `emerg', replace
	restore

	/*Keep only Non-emergent BLS rides between home and an ESRD facility*/
	keep if NE_hometodial==1
}

/*Getting information on various categories of rides*/
su ambulance_id // Referenced in Abstract and Section 1, Paragraph 2 and Section 3, Paragraph 1
egen total_payments=sum(tot_pmt)
format total_payments %12.0g
list total_payments if _n==1 // Referenced in Abstract and Section 1, Paragraph 2 and Section 3, Paragraph 1
codebook NPI // Referenced in Section 3, Paragraph 1
codebook usrds_id if fraud==1 // Referenced in Section 6.1, Paragraph 4

qui {
	rename (civ_backupdate crim_backupdate civ_casecount crim_casecount) (first_civil first_criminal civil_count criminal_count)

	keep usrds_id t priorauth_date NPI date ambulance_id year month tot_pmt district dist_code prior_auth first_* *_count
	save "${cleandatapath}ridelevel.dta", replace

	/*Getting firm entry*/
	sort NPI dist_code date
	bysort NPI dist_code (date): gen entry=(_n==1) if !missing(NPI)
	bysort NPI dist_code (date): gen exit=(_n==_N) if !missing(NPI)
	bysort date dist_code NPI: gen active_firms=(_n==1) if !missing(NPI)

	/*District-level diff-in-diff of prior authorization and indictments*/
	collapse (mean) first_c* c*count prior_auth (count) ambulance_id (sum) active_firms tot_pmt entry exit, by(district dist_code date month year)
	
	merge 1:1 district dist_code date month year using `emerg'
	drop _merge

	drop if district=="MISSING"
	gen month_date=mofd(date)
	format month_date %tm
	tsset dist_code month_date
	tsfill, full

	replace ambulance_id=0 if missing(ambulance_id)
	replace emerg_rides=0 if missing(emerg_rides)
	replace tot_pmt=0 if missing(tot_pmt)
	replace emerg_pmt=0 if missing(emerg_pmt)
	replace active_firms=0 if missing(active_firms)
	replace active_firms=. if month_date<mofd(date("1jan2012","DMY"))
	gen log_rides=log(ambulance_id+1)
	gen log_emerg_rides=log(emerg_rides+1)
	gen log_tot_pmt=log(tot_pmt+1)
	gen log_emerg_pmt=log(emerg_pmt+1)
	gen log_active_firms=log(active_firms+1)
	gen ihs_pmt=log(tot_pmt + sqrt(tot_pmt^2 + 1))
	gen ihs_rides=log(ambulance_id + sqrt(ambulance_id^2 + 1))
	gen ihs_firms=log(active_firms + sqrt(active_firms^2 + 1))

	replace prior_auth = 0 if missing(prior_auth)
	replace prior_auth = 1 if dist_code==16 & month_date>=mofd(date("1jan2016","DMY"))

	bysort dist_code prior_auth: egen PA_date_pre=min(date) if prior_auth==1
	bysort dist_code: egen PA_date=mode(PA_date_pre)
	gen post_PA=month_date>=mofd(PA_date)

	bysort dist_code: egen crim_date=mode(first_criminal)
	bysort dist_code: egen civ_date=mode(first_civil)
	bysort dist_code: egen crim_count=mode(criminal_count)
	replace crim_count=0 if missing(crim_count)
	bysort dist_code: egen civ_count=mode(civil_count)
	replace civ_count=0 if missing(civ_count)

	gen post_crim=month_date>=mofd(crim_date)
	gen post_civ=month_date>=mofd(civ_date)

	bysort dist_code: egen dist_name=mode(district)
	gen civil_spill_drop=(dist_name=="ILN" | dist_name=="ILC" | dist_name=="IAS" | dist_name=="IAN" | dist_name=="WIW" | dist_name=="WIE" | dist_name=="INN" | dist_name=="Massachusetts" | dist_name=="Rhode Island" | dist_name=="Connecticut" | dist_name=="NYN" | dist_name=="Vermont" | dist_name=="New Hampshire" | dist_name=="ARE" | dist_name=="MOE" | dist_name=="MOW" | dist_name=="ARW" | dist_name=="MSN" | dist_name=="TNW" | dist_name=="NCE" | dist_name=="South Carolina" | dist_name=="NCM" | dist_name=="VAW" | dist_name=="VAE" | dist_name=="CAC" | dist_name=="CAS" | dist_name=="AZ" | dist_name=="NV" | dist_name=="CAE" | dist_name=="CAN" | dist_name=="ALN" | dist_name=="PAM" | dist_name=="TXS" | dist_name=="TNM" | dist_name=="KYE" | dist_name=="FLM" | dist_name=="GAM" | dist_name=="GAS" | dist_name=="NYS" | dist_name=="LAW" | dist_name=="MSS")
	gen first_civil_spill=0
	replace first_civil_spill=ym(2009,9) if dist_name=="TNW" | dist_name=="TNM" | dist_name=="TNE" | dist_name=="GAN" | dist_name=="ALM" | dist_name=="ALS" | dist_name=="MSN"
	replace first_civil_spill=ym(2011,1) if dist_name=="NCE" | dist_name=="NCM" | dist_name=="NCW" | dist_name=="GAM" | dist_name=="GAS"
	replace first_civil_spill=ym(2011,6) if dist_name=="Maryland" | dist_name=="PAE" | dist_name=="New Jersey" | dist_name=="NYS" | dist_name=="NYN" | dist_name=="NYW" | dist_name=="PAW"
	replace first_civil_spill=ym(2011,10) if dist_name=="TXW" | dist_name=="TXE"
	replace first_civil_spill=ym(2012,8) if dist_name=="KYW" | dist_name=="KYE"
	replace first_civil_spill=ym(2013,1) if dist_name=="INS" | dist_name=="OHS" | dist_name=="WVS" | dist_name=="VAW"
	replace first_civil_spill=ym(2015,3) if dist_name=="FLS" | dist_name=="FLN"
	replace first_civil_spill=ym(2017,11) if dist_name=="WVN" | dist_name=="District of Columbia"

	gen criminal_spill_drop=(dist_name=="ARE" | dist_name=="ARW" | dist_name=="MOW" | dist_name=="MOE" | dist_name=="TNW" | dist_name=="MSN" | dist_name=="MSS" | dist_name=="LAW" | dist_name=="CAC" | dist_name=="CAS" | dist_name=="Arizona" | dist_name=="Nevada" | dist_name=="CAE" | dist_name=="CAN" | dist_name=="NCE" | dist_name=="SC" | dist_name=="NCM" | dist_name=="VAW" | dist_name=="VAE" | dist_name=="TXE" | dist_name=="TXS" | dist_name=="VAW" | dist_name=="TXN" | dist_name=="TNM" | dist_name=="PAE" | dist_name=="INN" | dist_name=="OHS" | dist_name=="Guam" | dist_name=="KYE")
	gen first_criminal_spill=0
	replace first_criminal_spill=ym(2006,9) if dist_name=="TXS" | dist_name=="TXW" | dist_name=="TXN" | dist_name=="OKW" | dist_name=="OKE" | dist_name=="ARW" | dist_name=="LAW"
	replace first_criminal_spill=ym(2008,1) if dist_name=="WVN" | dist_name=="WVS" | dist_name=="KYE" | dist_name=="TNE" | dist_name=="NCW"
	replace first_criminal_spill=ym(2009,6) if dist_name=="New Mexico"
	replace first_criminal_spill=ym(2010,1) if dist_name=="KYW" | dist_name=="TNW" | dist_name=="MSN" | dist_name=="ALN"
	replace first_criminal_spill=ym(2011,2) if dist_name=="Maryland" | dist_name=="Deleware" | dist_name=="New Jersey" | dist_name=="PAM"
	replace first_criminal_spill=ym(2011,5) if dist_name=="Connecticut" | dist_name=="Massachusetts"
	replace first_criminal_spill=ym(2012,1) if dist_name=="NYS" | dist_name=="NYN" | dist_name=="NYW" | dist_name=="PAW"
	replace first_criminal_spill=ym(2012,11) if dist_name=="ILN" | dist_name=="ILC" | dist_name=="INS" | dist_name=="OHS" | dist_name=="OHN" | dist_name=="MIW"

	gen PA_date_spill=ym(2014,12) if dist_name=="NYE" | dist_name=="NYS" | dist_name=="NYN" | dist_name=="NYW" | dist_name=="OHN" | dist_name=="GAN" | dist_name=="GAM" | dist_name=="GAS"
	replace PA_date_spill=ym(2016,1) if dist_name=="OHS" | dist_name=="KYE" | dist_name=="TNE"

	save "${cleandatapath}dist_data.dta", replace
}

log close